{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import xgboost as xgb\n",
    "import sys,random\n",
    "import pickle\n",
    "import os\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.exists('train_33465_preds'):\n",
    "    os.mkdir('train_33465_preds')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_x_d_cols = pd.read_csv('./rank_d_feature_score.csv')\n",
    "train_x_d_cols = list(train_x_d_cols.iloc[10:810].feature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "#load data\n",
    "unlabel_date = pd.read_csv('../../preprocess_data/unlabel_x_date.csv')\n",
    "unlabel_null = pd.read_csv('../../preprocess_data/unlabel_x_filter_null.csv').drop(columns=['id'])\n",
    "unlabel_int = pd.read_csv('../../preprocess_data/unlabel_x_int.csv').drop(columns=['id','tag'])\n",
    "unlabel_d = pd.read_csv('../../preprocess_data/unlabel_x_float_rank_d.csv',usecols=train_x_d_cols)\n",
    "unlabel_nd = pd.read_csv('../../preprocess_data/unlabel_x_float_rank_nd.csv').drop(columns=['id'])\n",
    "unlabel = pd.concat([unlabel_date,unlabel_null,unlabel_int,unlabel_d,unlabel_nd],axis=1,ignore_index=True,copy=False)\n",
    "# pd.concat后特征名没有了，变成数字了\n",
    "test_id = unlabel.iloc[:,0].values\n",
    "test = unlabel.drop(0,axis=1)\n",
    "dtest = xgb.DMatrix(test.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pipeline(iteration,dtest,test_id):\n",
    "    bst = xgb.Booster(model_file = './model/xgb{0}.model'.format(0))\n",
    "\n",
    "    #predict test set\n",
    "    test_y = bst.predict(dtest)\n",
    "    test_result = pd.DataFrame(test_id,columns=[\"id\"])\n",
    "    test_result['score'] = test_y\n",
    "    test_result.to_csv(\"./train_33465_preds_preds/xgb{0}.csv\".format(iteration),index=None,encoding='utf-8')\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7min 17s, sys: 21.3 s, total: 7min 38s\n",
      "Wall time: 22.3 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for i in range(36):\n",
    "    pipeline(i,dtest,test_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "files = os.listdir('train_33465_preds')\n",
    "pred = pd.read_csv('./train_33465_preds/%s' %files[0])  \n",
    "id = pred.id\n",
    "score = pred.score\n",
    "for file in files[1:]:\n",
    "    score += pd.read_csv('./train_33465_preds/%s' %files[0]).score\n",
    "score /= len(files)\n",
    "avg_pred = pd.DataFrame(id,columns=['id'])\n",
    "avg_pred['score'] = score\n",
    "\n",
    "avg_pred.to_csv('./train_33465_avg_pred.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
